跳到主要内容

10. Validators

Annotated Validators

使用 Annotated 上的 validator 去验证类型

from typing import Any, List
from typing_extensions import Annotated
from pydantic import BaseModel, ValidationError
from pydantic.functional_validators import AfterValidator

def check_squares(v: int) -> int:
assert v**0.5 % 1 == 0, f'{v} is not a square number'
return v

def double(v: Any) -> Any:
return v * 2


MyNumber = Annotated[int, AfterValidator(double), AfterValidator(check_squares)]


class DemoModel(BaseModel):
number: List[MyNumber]


print(DemoModel(number=[2, 8]))
#> number=[4, 16]
try:
DemoModel(number=[2, 4])
except ValidationError as e:
print(e)
"""
1 validation error for DemoModel
number
Assertion failed, 8 is not a square number
assert ((8 ** 0.5) % 1) == 0 [type=assertion_error, input_value=4, input_type=int]
"""

Before, After, Wrap and Plain validators

  • After validators 在 Pydantic 内部解析之后运行. They are generally more type safe and thus easier to implement.
  • Before validators 在内部解析之前运行. These are more flexible than After validators since they can modify the raw input, but they also have to deal with the raw input, which in theory could be any arbitrary object.
  • Plain validators are like a mode='before' validator 但它立即终止验证,不会调用任何其他验证器,pydantic 也不会调用它内部的验证逻辑
  • Wrap validators 是最复杂的. You can run code before or after Pydantic and other validators do their thing or you can terminate validation immediately, both with a successful value or an error. 你能添加多个 before, after, or mode='wrap' validators, 但 PlainValidator 只能有一个 since a plain validator will not call any inner validators. Here's an example of a mode='wrap' validator:
import json
from typing import Any, List

from typing_extensions import Annotated

from pydantic import (
BaseModel,
ValidationError,
ValidationInfo,
ValidatorFunctionWrapHandler,
)
from pydantic.functional_validators import WrapValidator


def maybe_strip_whitespace(
v: Any, handler: ValidatorFunctionWrapHandler, info: ValidationInfo
) -> int:
if info.mode == 'json':
assert isinstance(v, str), 'In JSON mode the input must be a string!'
# you can call the handler multiple times
try:
return handler(v)
except ValidationError:
return handler(v.strip())
assert info.mode == 'python'
assert isinstance(v, int), 'In Python mode the input must be an int!'
# do no further validation
return v


MyNumber = Annotated[int, WrapValidator(maybe_strip_whitespace)]


class DemoModel(BaseModel):
number: List[MyNumber]


print(DemoModel(number=[2, 8]))
#> number=[2, 8]
print(DemoModel.model_validate_json(json.dumps({'number': [' 2 ', '8']})))
#> number=[2, 8]
try:
DemoModel(number=['2'])
except ValidationError as e:
print(e)
"""
1 validation error for DemoModel
number
Assertion failed, In Python mode the input must be an int!
assert False
+ where False = isinstance('2', int) [type=assertion_error, input_value='2', input_type=str]
"""

The same "modes" apply to @field_validator, which is discussed in the next section.

Annotated 中 validator 的顺序

Annotated 中 validator 的顺序至关重要。验证从右到左,然后返回

即:从右到左运行所有 before validators(或者 wrap),然后再从左到右运行所有 after validators

That is, it goes from right to left running all "before" validators (or calling into "wrap" validators), then left to right back out calling all "after" validators.

from typing import Any, Callable, List, cast

from typing_extensions import Annotated, TypedDict

from pydantic import (
AfterValidator,
BaseModel,
BeforeValidator,
PlainValidator,
ValidationInfo,
ValidatorFunctionWrapHandler,
WrapValidator,
)
from pydantic.functional_validators import field_validator


class Context(TypedDict):
logs: List[str]


def make_validator(label: str) -> Callable[[str, ValidationInfo], str]:
def validator(v: Any, info: ValidationInfo) -> Any:
context = cast(Context, info.context)
context['logs'].append(label)
return v

return validator


def make_wrap_validator(
label: str,
) -> Callable[[str, ValidatorFunctionWrapHandler, ValidationInfo], str]:
def validator(
v: Any, handler: ValidatorFunctionWrapHandler, info: ValidationInfo
) -> Any:
context = cast(Context, info.context)
context['logs'].append(f'{label}: pre')
result = handler(v)
context['logs'].append(f'{label}: post')
return result

return validator


class A(BaseModel):
x: Annotated[
str,
BeforeValidator(make_validator('before-1')),
AfterValidator(make_validator('after-1')),
WrapValidator(make_wrap_validator('wrap-1')),
BeforeValidator(make_validator('before-2')),
AfterValidator(make_validator('after-2')),
WrapValidator(make_wrap_validator('wrap-2')),
BeforeValidator(make_validator('before-3')),
AfterValidator(make_validator('after-3')),
WrapValidator(make_wrap_validator('wrap-3')),
BeforeValidator(make_validator('before-4')),
AfterValidator(make_validator('after-4')),
WrapValidator(make_wrap_validator('wrap-4')),
]
y: Annotated[
str,
BeforeValidator(make_validator('before-1')),
AfterValidator(make_validator('after-1')),
WrapValidator(make_wrap_validator('wrap-1')),
BeforeValidator(make_validator('before-2')),
AfterValidator(make_validator('after-2')),
WrapValidator(make_wrap_validator('wrap-2')),
PlainValidator(make_validator('plain')),
BeforeValidator(make_validator('before-3')),
AfterValidator(make_validator('after-3')),
WrapValidator(make_wrap_validator('wrap-3')),
BeforeValidator(make_validator('before-4')),
AfterValidator(make_validator('after-4')),
WrapValidator(make_wrap_validator('wrap-4')),
]

val_x_before = field_validator('x', mode='before')(
make_validator('val_x before')
)
val_x_after = field_validator('x', mode='after')(
make_validator('val_x after')
)
val_y_wrap = field_validator('y', mode='wrap')(
make_wrap_validator('val_y wrap')
)


context = Context(logs=[])

A.model_validate({'x': 'abc', 'y': 'def'}, context=context)
print(context['logs'])
"""
[
'val_x before',
'wrap-4: pre',
'before-4',
'wrap-3: pre',
'before-3',
'wrap-2: pre',
'before-2',
'wrap-1: pre',
'before-1',
'after-1',
'wrap-1: post',
'after-2',
'wrap-2: post',
'after-3',
'wrap-3: post',
'after-4',
'wrap-4: post',
'val_x after',
'val_y wrap: pre',
'wrap-4: pre',
'before-4',
'wrap-3: pre',
'before-3',
'plain',
'after-3',
'wrap-3: post',
'after-4',
'wrap-4: post',
'val_y wrap: post',
]
"""

默认值的验证

如果有默认值,那么 validators 不会运行(无论是 @field_validator validators 还是 Annotated validators)。

使用 Field(validate_default=True) 来强行开启默认值验证;不过,更好的方法还是使用 @model_validator(mode='before') where the function is called before the inner validator is called.

from typing_extensions import Annotated

from pydantic import BaseModel, Field, field_validator


class Model(BaseModel):
x: str = 'abc'
y: Annotated[str, Field(validate_default=True)] = 'xyz'

@field_validator('x', 'y')
@classmethod
def double(cls, v: str) -> str:
return v * 2


print(Model())
#> x='abc' y='xyzxyz'
print(Model(x='foo'))
#> x='foofoo' y='xyzxyz'
print(Model(x='abc'))
#> x='abcabc' y='xyzxyz'
print(Model(x='foo', y='bar'))
#> x='foofoo' y='barbar'

Field validators

使用 @field_validator decorator 来验证指定字段

from pydantic import (
BaseModel,
ValidationError,
ValidationInfo,
field_validator,
)


class UserModel(BaseModel):
name: str
id: int

@field_validator('name')
@classmethod
def name_must_contain_space(cls, v: str) -> str:
if ' ' not in v:
raise ValueError('must contain a space')
return v.title()

# you can select multiple fields, or use '*' to select all fields
@field_validator('id', 'name')
@classmethod
def check_alphanumeric(cls, v: str, info: ValidationInfo) -> str:
if isinstance(v, str):
# info.field_name is the name of the field being validated
is_alphanumeric = v.replace(' ', '').isalnum()
assert is_alphanumeric, f'{info.field_name} must be alphanumeric'
return v


print(UserModel(name='John Doe', id=1))
#> name='John Doe' id=1

try:
UserModel(name='samuel', id=1)
except ValidationError as e:
print(e)
"""
1 validation error for UserModel
name
Value error, must contain a space [type=value_error, input_value='samuel', input_type=str]
"""

try:
UserModel(name='John Doe', id='abc')
except ValidationError as e:
print(e)
"""
1 validation error for UserModel
id
Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='abc', input_type=str]
"""

try:
UserModel(name='John Doe!', id=1)
except ValidationError as e:
print(e)
"""
1 validation error for UserModel
name
Assertion failed, name must be alphanumeric
assert False [type=assertion_error, input_value='John Doe!', input_type=str]
"""
  • @field_validator 是类方法(class methods),所以它的首个参数是个 UserModel 类,而不是 UserModel 实例,建议在其下方添加 @classmethod 装饰器以获得正确的 type checking
  • 第二个参数就是字段的值了
  • 第三个参数是一个 pydantic.ValidationInfo 实例
  • validators 要不 return 一个处理过的值,要不 raise a ValueError or AssertionError (可能会用到 assert)
  • 单个 validator 能覆盖到多个字段,只要提在装饰器里供多个字段名即可(使用  '*' 能覆盖到所有参数)

If you make use of assert statements, keep in mind that running Python with the -O optimization flag disables assert statements, and validators will stop working.

如果你想在 @field_validator 中拿到其他字段的信息,可以使用 ValidationInfo.data,它是一个 field name -> field value 的字典. 字段的验证会按照字段定义的顺序进行,所以访问 ValidationInfo.data 里的数据要小心了。for example, you would not be able to access info.data['id'] from within name_must_contain_space. 在大部分情况下,使用 @model_validator 来处理多字段是更好的选择

model validators

使用 @model_validator 来对整个 model 起验证作用

from typing import Any
from typing_extensions import Self
from pydantic import BaseModel, ValidationError, model_validator


class UserModel(BaseModel):
username: str
password1: str
password2: str

@model_validator(mode='before')
@classmethod
def check_card_number_omitted(cls, data: Any) -> Any:
if isinstance(data, dict):
assert (
'card_number' not in data
), 'card_number should not be included'
return data

@model_validator(mode='after')
def check_passwords_match(self) -> Self:
pw1 = self.password1
pw2 = self.password2
if pw1 is not None and pw2 is not None and pw1 != pw2:
raise ValueError('passwords do not match')
return self


print(UserModel(username='scolvin', password1='zxcvbn', password2='zxcvbn'))
#> username='scolvin' password1='zxcvbn' password2='zxcvbn'
try:
UserModel(username='scolvin', password1='zxcvbn', password2='zxcvbn2')
except ValidationError as e:
print(e)
"""
1 validation error for UserModel
Value error, passwords do not match [type=value_error, input_value={'username': 'scolvin', '... 'password2': 'zxcvbn2'}, input_type=dict]
"""

try:
UserModel(
username='scolvin',
password1='zxcvbn',
password2='zxcvbn',
card_number='1234',
)
except ValidationError as e:
print(e)
"""
1 validation error for UserModel
Assertion failed, card_number should not be included
assert 'card_number' not in {'card_number': '1234', 'password1': 'zxcvbn', 'password2': 'zxcvbn', 'username': 'scolvin'} [type=assertion_error, input_value={'username': 'scolvin', '..., 'card_number': '1234'}, input_type=dict]
"""

Model validators 可以是 mode='before'mode='after' or mode='wrap'

Before model validators 传入一个 dict[str, Any] 不过也可以是个 model 实例 (e.g. if UserModel.model_validate(UserModel.construct(...)) is called),或者其他任何任何乱七八糟的东西(取决于你在 model_validate 传入了啥),得益于此,mode='before' validators 异常强大,也异常复杂,容易出错。 Before model validators 需被定义为 class method

  • 第一个参数是 cls
  • 第二个参数就是 input (建议定义为 Any 类型,然后用 isinstance 去具体地判断类型)
  • 第三个参数 (if present) 是 pydantic.ValidationInfo 实例

After model validators 是一个 instance methods,接收一个 model instance 作为第一个参数(self),返回一个 Self 它比 before 更容易实现,Since these are fully type safe,如果有任何字段验证出错了, mode='after' validators for that field will not be called.

在 validators 处理错误

你可以在 validator 中 raise either a ValueError or AssertionError (including ones generated by assert ... statements);也可以选择 raise a PydanticCustomError,少一点简洁,多一点灵活。任何其他错误 (including TypeError) 都会冒出来,不会被封装在 ValidationError

from pydantic_core import PydanticCustomError
from pydantic import BaseModel, ValidationError, field_validator

class Model(BaseModel):
x: int

@field_validator('x')
@classmethod
def validate_x(cls, v: int) -> int:
if v % 42 == 0:
raise PydanticCustomError(
'the_answer_error',
'{number} is the answer!',
{'number': v},
)
return v


try:
Model(x=42 * 2)
except ValidationError as e:
print(e)
"""
1 validation error for Model
x
84 is the answer! [type=the_answer_error, input_value=84, input_type=int]
"""

特殊类型

Pydantic 提供了一些特殊类型来自定义验证过程

  • InstanceOf 类型能够验证 value 是否是给定类的实例
from typing import List
from pydantic import BaseModel, InstanceOf, ValidationError


class Fruit:
def __repr__(self):
return self.__class__.__name__

class Banana(Fruit):
...

class Apple(Fruit):
...

class Basket(BaseModel):
fruits: List[InstanceOf[Fruit]]

print(Basket(fruits=[Banana(), Apple()]))
#> fruits=[Banana, Apple]
try:
Basket(fruits=[Banana(), 'Apple'])
except ValidationError as e:
print(e)
"""
1 validation error for Basket
fruits
Input should be an instance of Fruit [type=is_instance_of, input_value='Apple', input_type=str]
"""
from typing import List
from pydantic import BaseModel, SkipValidation


class Model(BaseModel):
names: List[SkipValidation[str]]


m = Model(names=['foo', 'bar'])
print(m)
#> names=['foo', 'bar']

m = Model(names=['foo', 123])
print(m)
#> names=['foo', 123]

Field checks

在类创建过程中,validators 会被检查以确认它们指定的字段实际上存在于 model 中。

这个特性在一些场合可能是不被需要的,比如你的 validators 是用来验证仅出现在 model 子类中的字段

如果您希望在类创建过程中禁用这些检查,您可以将 check_fields=False 作为关键字参数传递给 validators。

Dataclass validators

Validators also work with Pydantic dataclasses.

from pydantic import field_validator
from pydantic.dataclasses import dataclass


@dataclass
class DemoDataclass:
product_id: str # should be a five-digit string, may have leading zeros

@field_validator('product_id', mode='before')
@classmethod
def convert_int_serial(cls, v):
if isinstance(v, int):
v = str(v).zfill(5)
return v


print(DemoDataclass(product_id='01234'))
#> DemoDataclass(product_id='01234')
print(DemoDataclass(product_id=2468))
#> DemoDataclass(product_id='02468')

Validation Context

你可以在 validation methods 中传入一个 context 关键词参数,在 validation 的 ValidationInfo 参数的 context 里能被拿到

from pydantic import BaseModel, ValidationInfo, field_validator

class Model(BaseModel):
text: str

@field_validator('text')
@classmethod
def remove_stopwords(cls, v: str, info: ValidationInfo):
context = info.context
if context:
stopwords = context.get('stopwords', set())
v = ' '.join(w for w in v.split() if w.lower() not in stopwords)
return v


data = {'text': 'This is an example document'}
print(Model.model_validate(data)) # no context
#> text='This is an example document'
print(Model.model_validate(data, context={'stopwords': ['this', 'is', 'an']}))
#> text='example document'
print(Model.model_validate(data, context={'stopwords': ['document']}))
#> text='This is an example'

这在您需要在运行时动态更新验证行为时非常有用。例如,如果您希望一个字段具有动态可控的一组允许值,可以通过上下文传递允许的值,并有一个单独的机制来更新允许的内容。

from typing import Any, Dict, List

from pydantic import (
BaseModel,
ValidationError,
ValidationInfo,
field_validator,
)

_allowed_choices = ['a', 'b', 'c']


def set_allowed_choices(allowed_choices: List[str]) -> None:
global _allowed_choices
_allowed_choices = allowed_choices


def get_context() -> Dict[str, Any]:
return {'allowed_choices': _allowed_choices}


class Model(BaseModel):
choice: str

@field_validator('choice')
@classmethod
def validate_choice(cls, v: str, info: ValidationInfo):
allowed_choices = info.context.get('allowed_choices')
if allowed_choices and v not in allowed_choices:
raise ValueError(f'choice must be one of {allowed_choices}')
return v


print(Model.model_validate({'choice': 'a'}, context=get_context()))
#> choice='a'

try:
print(Model.model_validate({'choice': 'd'}, context=get_context()))
except ValidationError as exc:
print(exc)
"""
1 validation error for Model
choice
Value error, choice must be one of ['a', 'b', 'c'] [type=value_error, input_value='d', input_type=str]
"""

set_allowed_choices(['b', 'c'])

try:
print(Model.model_validate({'choice': 'a'}, context=get_context()))
except ValidationError as exc:
print(exc)
"""
1 validation error for Model
choice
Value error, choice must be one of ['b', 'c'] [type=value_error, input_value='a', input_type=str]
"""

Similarly, you can use a context for serialization.

Using validation context with BaseModel initialization

Although there is no way to specify a context in the standard BaseModel initializer, you can work around this through the use of contextvars.ContextVar and a custom __init__ method:

from contextlib import contextmanager
from contextvars import ContextVar
from typing import Any, Dict, Iterator
from pydantic import BaseModel, ValidationInfo, field_validator

_init_context_var = ContextVar('_init_context_var', default=None)

@contextmanager
def init_context(value: Dict[str, Any]) -> Iterator[None]:
token = _init_context_var.set(value)
try:
yield
finally:
_init_context_var.reset(token)

class Model(BaseModel):
my_number: int

def __init__(self, /, **data: Any) -> None:
self.__pydantic_validator__.validate_python(
data,
self_instance=self,
context=_init_context_var.get(),
)

@field_validator('my_number')
@classmethod
def multiply_with_context(cls, value: int, info: ValidationInfo) -> int:
if info.context:
multiplier = info.context.get('multiplier', 1)
value = value * multiplier
return value


print(Model(my_number=2))
#> my_number=2

with init_context({'multiplier': 3}):
print(Model(my_number=2))
#> my_number=6

print(Model(my_number=2))
#> my_number=2

复用 Validators

The following approach demonstrates how you can reuse a validator so that redundancy is minimized and the models become again almost declarative.

from pydantic import BaseModel, field_validator


def normalize(name: str) -> str:
return ' '.join((word.capitalize()) for word in name.split(' '))


class Producer(BaseModel):
name: str

_normalize_name = field_validator('name')(normalize)


class Consumer(BaseModel):
name: str

_normalize_name = field_validator('name')(normalize)


jane_doe = Producer(name='JaNe DOE')
print(repr(jane_doe))
#> Producer(name='Jane Doe')
john_doe = Consumer(name='joHN dOe')
print(repr(john_doe))
#> Consumer(name='John Doe')